.markers <-
    c("CD3", "CD3E", "CD14", "LYZ",
      "CCR4","CCR6","CXCR3","CXCR5",
      "ABCA1","GBP4","CDCA7L","ITM2C","NR1D1",
      "CTSH","GATA3","FHIT","CD40LG","C1orf162",
      "MGATA4A","GZMK","IFNGR2",
      "CD183","CD184","CD185","CD196","CD195","CD194",
      "RORC", "TBX21", "HLADR", "CD74", "TCF7", "LEF1",
      "SELL", "CCR7", "CCR8", "IKZF2", "TIGIT", "CD226",
      "BATF", "ANXA2", "BRD9", "HPGD", "LMNA", "TNFRSF4",
      "FOXP3", "FOXP1", "PDCD1", "CD279", "CTLA4", "LAG3",
      "HAVCR2", "CD366", "KLRB1", "FOSL2", "S100A4", "GMAP7",
      "JUN", "IL7R", "MYC", "IL32", "ISG20", "MALAT1",
      "GSDMD", "HDAC1", "GIMAP4", "APOBEC3G", "CD2", "CD28", "CD6",
      "CDKN2A", "CORO1A", "FAS", "FLI1", "GPR25", "MT2A", "KEAP1",
      "IL12RB", "SIRT2", "TNFRSF14", "TRAF3IP3", "IRF2", "PSPH",
      "CD278", "B2M", "RPS26", "MAP1S", "SGK1", "BACH2",
      "HLA-C", "HLA-B", "HLA-E", "HLA-DR", "HLA-DRA", "HLA-DRB1",
      "S1PR4", "KLF2", "SATB1", "TSC22D3", "IL2RA", "CD25") %>%
    unique
.hash.hdr <- "result/step1/hash"
.hash.data <- fileset.list(.hash.hdr)
.hash.info <- read.hash(.hash.data)
annot.dt <- fread("Tab/step2_celltype.txt.gz")

1. Memory T conventional

.full.data <- fileset.list("result/step2/matrix_final")
.mkdir("result/step4/")
.data <- fileset.list("result/step4/mtconv")

if.needed(.data, {
    .tags <- unique(annot.dt[celltype == "mTconv"]$tag)
    .data <-
        rcpp_mmutil_copy_selected_columns(.full.data$mtx,
                                          .full.data$row,
                                          .full.data$col,
                                          .tags,
                                          "result/step4/mtconv")
})

Perform outlier Q/C to detect and remove batch-specific cells

.file <- "result/step4/mtconv_svd.rds"
if.needed(.file, {
    .svd <- rcpp_mmutil_svd(.data$mtx, RANK=30, TAKE_LN=T, NUM_THREADS = 16, EM_ITER = 20)
    saveRDS(.svd, .file)
})
.svd <- readRDS(.file)

Can we identify troublesome batch-specific principal components?

Show PCs computed on surface protein data
V <- sweep(.svd$V, 2, .svd$D, `*`)
rownames(V) <- readLines(.data$col)
plots <- lapply(1:9, .plot.vd.fun, VD = .pca.df(V))
plt <- wrap_plots(plots, ncol = 3)
print(plt)

  • We can adjust systemic batch-specific effects by batch-balancing k-nearest neighbour method. This shouldn’t be much trouble.
.file <- "result/step4/mtconv_bbknn.rds"
if.needed(.file, {

    .batches <- take.batch.info(.data)

    .bbknn <-
        rcpp_mmutil_bbknn(r_svd_u = .svd$U,
                          r_svd_v = .svd$V,
                          r_svd_d = .svd$D,
                          r_batches = .batches, # batch label
                          knn = 50,             # 20 nn per batch
                          RECIPROCAL_MATCH = T, # crucial
                          NUM_THREADS = 16,
                          USE_SINGULAR_VALUES = T)

    saveRDS(.bbknn, .file)
})
.bbknn <- readRDS(.file)
VD <- .bbknn$factors.adjusted
rownames(VD) <- readLines(.data$col)
plots <- lapply(1:9, .plot.vd.fun, VD = .pca.df(VD))
plt <- wrap_plots(plots, ncol = 3)
print(plt)

A. Clustering cells by batch-balancing k-nearest neighbour graph

.file <- "Tab/step4_mtconv_leiden_raw.txt.gz"
if.needed(.file, {
    .tags <- readLines(.data$col)
    .leiden <- run.leiden(.bbknn$knn.adj, .tags, res=.7, nrepeat = 100, min.size = 100)
    fwrite(.leiden, .file)
})
.leiden.raw <- fread(.file)

Filter out small cluster cells if we have enriched specific batches

.file <- "Tab/step4_mtconv_leiden.txt.gz"
if.needed(.file, {
    .leiden <- qc.leiden(.leiden.raw, .cutoff = .75)
    fwrite(.leiden, .file)
})
.leiden <- fread(.file)    

DOWNLOAD: mTconv Leiden results

.file <- "Tab/step4_tumap_mtconv.txt.gz"
if.needed(.file, {

    set.seed(1)
    .umap <- uwot::tumap(.bbknn$factors.adjusted,
                         learning_rate=.1,
                         n_epochs=3000,
                         n_sgd_threads=16,
                         verbose=T,
                         init="spectral",
                         scale=T)

    .tags <- readLines(.data$col)

    colnames(.umap) <- "UMAP" %&% 1:ncol(.umap)

    .umap.dt <-
        data.table(.umap, tag = .tags) %>%
        left_join(.leiden) %>%
        na.omit()

    fwrite(.umap.dt, .file)
})
.umap.dt <- fread(.file)

DOWNLOAD: mTconv UMAP results

.file <- "Tab/step4_tsne_mtconv.txt.gz"
if.needed(.file, {

    .tsne <- Rtsne::Rtsne(.bbknn$factors.adjusted,
                          check_duplicates = FALSE,
                          verbose = T,
                          num_threads = 16,
                          perplexity = 100)

    .tags <- readLines(.data$col)

    colnames(.tsne$Y) <- "tSNE" %&% 1:ncol(.tsne$Y)

    .tsne.dt <- data.table(.tsne$Y, tag = .tags) %>%
        left_join(.leiden) %>%
        na.omit()

    fwrite(.tsne.dt, .file)
})
.tsne.dt <- fread(.file)

DOWNLOAD: mTconv tSNE results

B. What are the cell-cluster-specific marker genes?

.mkdir("Tab/")
.file <- "Tab/step4_mtconv_gene_stat.txt.gz"
if.needed(.file, {
    x <- bbknn.x(.data, .bbknn)
    marker.stat <- take.marker.stats(x, .leiden)
    fwrite(marker.stat, .file, sep = "\t", col.names = T)
})
marker.stat <- fread(.file, sep = "\t")

DOWNLOAD: mTconv marker gene statistics

C. Non-linear embedding to confirm the cell clusters of mTconv cells

.cells <-
    left_join(.umap.dt, .tsne.dt) %>%
    left_join(.leiden) %>%
    left_join(.hash.info) %>%
    na.omit()

.lab <-
    .cells[,
           .(UMAP1=median(UMAP1),
             UMAP2=median(UMAP2),
             tSNE1=median(tSNE1),
             tSNE2=median(tSNE2)),
           by = .(component, membership)]

.cols <- .more.colors(nrow(.lab), nc.pal=12)

p1 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(membership))) +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    geom_text(aes(label=membership), data=.lab, size=4, color="black") +
    scale_color_manual(values = .cols, guide="none")

p2 <-
    .gg.plot(.cells, aes(tSNE1, tSNE2, color=as.factor(membership))) +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    geom_text(aes(label=membership), data=.lab, size=4, color="black") +
    scale_color_manual(values = .cols, guide="none")

plt <- p1 | p2
print(plt)

PDF

Confirm batch/individual-specific effects
.cols <- .more.colors(10, nc.pal=7, .palette="Set1")

p1 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(subject))) +
    xlab("UMAP1") + ylab("UMAP2") +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    scale_color_manual(values = .cols, guide="none")

p2 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(subject))) +
    xlab("UMAP1") + ylab("UMAP2") +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    scale_color_manual(values = .cols, guide="none")

plt <- p1 | p2
print(plt)

PDF

D. Summary heatmap

NOTE The colors are standardized log1p expression across genes and cells.

x.melt <- bbknn.x.melt(.data, .bbknn, .markers)
.dt <- x.melt %>% left_join(.cells) %>% na.omit()
.sum.subj <- .dt[, .(x = median(x)), by = .(gene, subject, membership)]
.sum.subj[, x := scale(x), by = .(gene)]
.sum <-
    .sum.subj[, .(x = median(x)), by = .(gene, membership)] %>%
    mutate(col = `gene`, row = membership, weight = x) %>%
    col.order(1:10, TRUE) %>%
    as.data.table()

plt <-
    .gg.plot(.sum, aes(row, col, fill=pmin(pmax(weight, -1.5), 1.5)))+
    geom_tile(linewidth=.1, color="black") +
    scale_fill_distiller("", palette = "RdBu", direction = -1) +
    theme(legend.key.width = unit(.2,"lines")) +
    theme(legend.key.height = unit(.5,"lines")) +
    xlab("cell clusters") + ylab("features")
print(plt)

PDF

.dt <- copy(.sum.subj) %>%
    mutate(gene = factor(`gene`, .marker.order)) %>%
    mutate(t = subject %&% "." %&% membership)

plt <-
    .gg.plot(.dt, aes(`t`, `gene`, fill=pmin(pmax(`x`, -1.5), 1.5))) +
    facet_grid(. ~ membership, space="free", scales="free")+
    geom_tile(linewidth=.1, color="black") +
    scale_fill_distiller("", palette = "RdBu", direction = -1) +
    theme(legend.key.width = unit(.2,"lines")) +
    theme(legend.key.height = unit(.5,"lines")) +
    theme(axis.ticks.x = element_blank()) +
    theme(axis.text.x = element_blank()) +
    xlab("subjects") + ylab("features")

print(plt)

PDF

UMAP for each marker gene (normalized expression)

for(g in unique(x.melt$gene)) {
    .dt <- left_join(x.melt[gene == g], .cells)
    .aes <- aes(UMAP1, UMAP2, color=pmax(pmin(x, 3), -3))

    plt <-
        .gg.plot(.dt[order(`x`)], .aes) +
        xlab("UMAP1") + ylab("UMAP2") +
        ggrastr::rasterise(geom_point(stroke = 0, size=.7), dpi=300) +
        theme(legend.key.width = unit(.2,"lines")) +
        theme(legend.key.height = unit(.5,"lines")) +
        scale_color_distiller(g, palette = "RdBu", direction = -1) +
        ggtitle(g)

    .file <- fig.dir %&% "/Fig_mtconv_gene_umap_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=3, height=2.5, cat.link = F)
    cat("[" %&% g %&% "](" %&% .file %&% ") ")
}

CD14 CD183 CD184 CD185 CD194 CD195 CD196 CD226 CD25 CD278 CD279 CD366 CD3 IL32 TRAF3IP3 CD6 CD74 FAS BRD9 IKZF2 FOXP3 SIRT2 TBX21 FOSL2 KEAP1 TCF7 LAG3 LYZ CD40LG CORO1A CTSH GSDMD GATA3 KLRB1 BACH2 CCR6 GZMK FOXP1 HDAC1 CD2 SGK1 MT2A S1PR4 CCR7 NR1D1 KLF2 MAP1S GIMAP4 IL2RA HAVCR2 ITM2C MYC LEF1 C1orf162 RORC PSPH CDKN2A FLI1 BATF TSC22D3 TNFRSF14 IFNGR2 CXCR5 LMNA GBP4 CTLA4 HPGD CDCA7L ABCA1 B2M IRF2 IL7R GPR25 ISG20 JUN CD28 CCR8 TIGIT SATB1 ANXA2 CCR4 CXCR3 TNFRSF4 PDCD1 SELL FHIT HLA-DRB1 S100A4 RPS26 CD3E HLA-DRA HLA-C HLA-E HLA-B APOBEC3G MALAT1 HLA-DR

tSNE for each marker gene (normalized expression)

for(g in unique(x.melt$gene)) {
    .dt <- left_join(x.melt[gene == g], .cells)
    .aes <- aes(tSNE1, tSNE2, color=pmax(pmin(x, 3), -3))

    plt <-
        .gg.plot(.dt[order(`x`)], .aes) +
        xlab("TSNE1") + ylab("TSNE2") +
        ggrastr::rasterise(geom_point(stroke = 0, size=.7), dpi=300) +
        theme(legend.key.width = unit(.2,"lines")) +
        theme(legend.key.height = unit(.5,"lines")) +
        scale_color_distiller(g, palette = "RdBu", direction = -1) +
        ggtitle(g)

    .file <- fig.dir %&% "/Fig_mtconv_gene_tsne_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=3, height=2.5, cat.link = F)
    cat("[" %&% g %&% "](" %&% .file %&% ") ")
}

CD14 CD183 CD184 CD185 CD194 CD195 CD196 CD226 CD25 CD278 CD279 CD366 CD3 IL32 TRAF3IP3 CD6 CD74 FAS BRD9 IKZF2 FOXP3 SIRT2 TBX21 FOSL2 KEAP1 TCF7 LAG3 LYZ CD40LG CORO1A CTSH GSDMD GATA3 KLRB1 BACH2 CCR6 GZMK FOXP1 HDAC1 CD2 SGK1 MT2A S1PR4 CCR7 NR1D1 KLF2 MAP1S GIMAP4 IL2RA HAVCR2 ITM2C MYC LEF1 C1orf162 RORC PSPH CDKN2A FLI1 BATF TSC22D3 TNFRSF14 IFNGR2 CXCR5 LMNA GBP4 CTLA4 HPGD CDCA7L ABCA1 B2M IRF2 IL7R GPR25 ISG20 JUN CD28 CCR8 TIGIT SATB1 ANXA2 CCR4 CXCR3 TNFRSF4 PDCD1 SELL FHIT HLA-DRB1 S100A4 RPS26 CD3E HLA-DRA HLA-C HLA-E HLA-B APOBEC3G MALAT1 HLA-DR

E. Basic statistics

.stat <-
    .cells[,
           .(N = .N),
           by=.(batch, membership, component, disease)] %>%
    mutate(membership = as.factor(membership)) %>% 
    .sum.stat.batch()

plt <- .plt.sum.stat(.stat) + ggtitle("mTconv")
print(plt)

PDF

.stat.tot <-
    .cells[,
           .(N = .N),
           by=.(membership, disease)] %>%
    mutate(membership = as.factor(membership)) %>% 
    .sum.stat.tot() %>%
    mutate(batch = "(N=" %&% num.int(sum(.stat$N)) %&% ")")

plt <- .plt.sum.stat(.stat.tot) + ggtitle("mTconv")
print(plt)

PDF

2. Memory Treg cells

.full.data <- fileset.list("result/step2/matrix_final")
.mkdir("result/step4/")
.data <- fileset.list("result/step4/mtreg")

if.needed(.data, {
    .tags <- unique(annot.dt[celltype == "mTreg"]$tag)
    .data <-
        rcpp_mmutil_copy_selected_columns(.full.data$mtx,
                                          .full.data$row,
                                          .full.data$col,
                                          .tags,
                                          "result/step4/mtreg")
})

Perform outlier Q/C to detect and remove batch-specific cells

.file <- "result/step4/mtreg_svd.rds"
if.needed(.file, {
    .svd <- rcpp_mmutil_svd(.data$mtx, RANK=30, TAKE_LN=T, NUM_THREADS = 16, EM_ITER = 20)
    saveRDS(.svd, .file)
})
.svd <- readRDS(.file)

Can we identify troublesome batch-specific principal components?

Show PCs computed on surface protein data
V <- sweep(.svd$V, 2, .svd$D, `*`)
rownames(V) <- readLines(.data$col)
plots <- lapply(1:9, .plot.vd.fun, VD = .pca.df(V))
plt <- wrap_plots(plots, ncol = 3)
print(plt)

  • We can adjust systemic batch-specific effects by batch-balancing k-nearest neighbour method. This shouldn’t be much trouble.
.file <- "result/step4/mtreg_bbknn.rds"
if.needed(.file, {

    .batches <- take.batch.info(.data)

    .bbknn <-
        rcpp_mmutil_bbknn(r_svd_u = .svd$U,
                          r_svd_v = .svd$V,
                          r_svd_d = .svd$D,
                          r_batches = .batches, # batch label
                          knn = 50,             # 20 nn per batch
                          RECIPROCAL_MATCH = T, # crucial
                          NUM_THREADS = 16,
                          USE_SINGULAR_VALUES = T)

    saveRDS(.bbknn, .file)
})
.bbknn <- readRDS(.file)
VD <- .bbknn$factors.adjusted
rownames(VD) <- readLines(.data$col)
plots <- lapply(1:9, .plot.vd.fun, VD = .pca.df(VD))
plt <- wrap_plots(plots, ncol = 3)
print(plt)

A. Clustering cells by batch-balancing k-nearest neighbour graph

.file <- "Tab/step4_mtreg_leiden_raw.txt.gz"
if.needed(.file, {
    .tags <- readLines(.data$col)
    .leiden <- run.leiden(.bbknn$knn.adj, .tags, res=.7, nrepeat = 100, min.size = 100)
    fwrite(.leiden, .file)
})
.leiden.raw <- fread(.file)

Filter out small cluster cells if we have enriched specific batches

.file <- "Tab/step4_mtreg_leiden.txt.gz"
if.needed(.file, {
    .leiden <- qc.leiden(.leiden.raw, .cutoff = .75)
    fwrite(.leiden, .file)
})
.leiden <- fread(.file)    

DOWNLOAD: mTreg Leiden results

.file <- "Tab/step4_tumap_mtreg.txt.gz"
if.needed(.file, {

    set.seed(1)
    .umap <- uwot::tumap(.bbknn$factors.adjusted,
                         learning_rate=.1,
                         n_epochs=3000,
                         n_sgd_threads=16,
                         verbose=T,
                         init="spectral",
                         scale=T)

    .tags <- readLines(.data$col)

    colnames(.umap) <- "UMAP" %&% 1:ncol(.umap)

    .umap.dt <-
        data.table(.umap, tag = .tags) %>%
        left_join(.leiden) %>%
        na.omit()

    fwrite(.umap.dt, .file)
})
.umap.dt <- fread(.file)

DOWNLOAD: mtreg UMAP results

.file <- "Tab/step4_tsne_mtreg.txt.gz"
if.needed(.file, {

    .tsne <- Rtsne::Rtsne(.bbknn$factors.adjusted,
                          check_duplicates = FALSE,
                          verbose = T,
                          num_threads = 16)

    .tags <- readLines(.data$col)

    colnames(.tsne$Y) <- "tSNE" %&% 1:ncol(.tsne$Y)

    .tsne.dt <- data.table(.tsne$Y, tag = .tags) %>%
        left_join(.leiden) %>%
        na.omit()

    fwrite(.tsne.dt, .file)
})
.tsne.dt <- fread(.file)

DOWNLOAD: mtreg tSNE results

B. What are the cell-cluster-specific marker genes?

.mkdir("Tab/")
.file <- "Tab/step4_mtreg_gene_stat.txt.gz"
if.needed(.file, {
    x <- bbknn.x(.data, .bbknn)
    marker.stat <- take.marker.stats(x, .leiden)
    fwrite(marker.stat, .file, sep = "\t", col.names = T)
})
marker.stat <- fread(.file, sep = "\t")

DOWNLOAD: mTreg marker gene statistics

C. Non-linear embedding to confirm the cell clusters of mtreg cells

.cells <-
    left_join(.umap.dt, .tsne.dt) %>%
    left_join(.leiden) %>%
    left_join(.hash.info) %>%
    na.omit()

.lab <-
    .cells[,
           .(UMAP1=median(UMAP1),
             UMAP2=median(UMAP2),
             tSNE1=median(tSNE1),
             tSNE2=median(tSNE2)),
           by = .(component, membership)]

.cols <- .more.colors(nrow(.lab), nc.pal=12)

p1 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(membership))) +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    geom_text(aes(label=membership), data=.lab, size=4, color="black") +
    scale_color_manual(values = .cols, guide="none")

p2 <-
    .gg.plot(.cells, aes(tSNE1, tSNE2, color=as.factor(membership))) +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    geom_text(aes(label=membership), data=.lab, size=4, color="black") +
    scale_color_manual(values = .cols, guide="none")

plt <- p1 | p2
print(plt)

PDF

Confirm batch/individual-specific effects
.cols <- .more.colors(10, nc.pal=7, .palette="Set1")

p1 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(subject))) +
    xlab("UMAP1") + ylab("UMAP2") +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    scale_color_manual(values = .cols, guide="none")

p2 <-
    .gg.plot(.cells, aes(UMAP1, UMAP2, color=as.factor(subject))) +
    xlab("UMAP1") + ylab("UMAP2") +
    ggrastr::rasterise(geom_point(stroke=0, alpha=.8, size=.7), dpi=300) +
    scale_color_manual(values = .cols, guide="none")

plt <- p1 | p2
print(plt)

PDF

D. Summary heatmap

NOTE The colors are standardized log1p expression across genes and cells.

x.melt <- bbknn.x.melt(.data, .bbknn, .markers)
.dt <- x.melt %>% left_join(.cells) %>% na.omit()
.sum.subj <- .dt[, .(x = median(x)), by = .(gene, subject, membership)]
.sum.subj[, x := scale(x), by = .(gene)]
.sum <-
    .sum.subj[, .(x = median(x)), by = .(gene, membership)] %>%
    mutate(col = `gene`, row = membership, weight = x) %>%
    col.order(1:15, TRUE) %>%
    as.data.table()

plt <-
    .gg.plot(.sum, aes(row, col, fill=pmin(pmax(weight, -1.5), 1.5)))+
    geom_tile(linewidth=.1, color="black") +
    scale_fill_distiller("", palette = "RdBu", direction = -1) +
    theme(legend.key.width = unit(.2,"lines")) +
    theme(legend.key.height = unit(.5,"lines")) +
    xlab("cell clusters") + ylab("features")
print(plt)

PDF

.dt <- copy(.sum.subj) %>%
    mutate(gene = factor(`gene`, .marker.order)) %>%
    mutate(t = subject %&% "." %&% membership)

plt <-
    .gg.plot(.dt, aes(`t`, `gene`, fill=pmin(pmax(`x`, -1.5), 1.5))) +
    facet_grid(. ~ membership, space="free", scales="free")+
    geom_tile(linewidth=.1, color="black") +
    scale_fill_distiller("", palette = "RdBu", direction = -1) +
    theme(legend.key.width = unit(.2,"lines")) +
    theme(legend.key.height = unit(.5,"lines")) +
    theme(axis.ticks.x = element_blank()) +
    theme(axis.text.x = element_blank()) +
    xlab("subjects") + ylab("features")

print(plt)

PDF

UMAP for each marker gene

for(g in unique(x.melt$gene)) {
    .dt <- left_join(x.melt[gene == g], .cells)
    .aes <- aes(UMAP1, UMAP2, color=pmax(pmin(x, 3), -3))

    plt <-
        .gg.plot(.dt[order(`x`)], .aes) +
        xlab("UMAP1") + ylab("UMAP2") +
        ggrastr::rasterise(geom_point(stroke = 0, size=.7), dpi=300) +
        theme(legend.key.width = unit(.2,"lines")) +
        theme(legend.key.height = unit(.5,"lines")) +
        scale_color_distiller(g, palette = "RdBu", direction = -1) +
        ggtitle(g)

    .file <- fig.dir %&% "/Fig_mtreg_gene_umap_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=3, height=2.5, cat.link = F)
    cat("[" %&% g %&% "](" %&% .file %&% ") ")
}

CD14 CD183 CD184 CD185 CD194 CD195 CD196 CD226 CD25 CD278 CD279 CD366 CD3 IL32 TRAF3IP3 CD6 CD74 FAS BRD9 IKZF2 FOXP3 SIRT2 TBX21 FOSL2 KEAP1 TCF7 LAG3 LYZ CD40LG CORO1A CTSH GSDMD GATA3 KLRB1 BACH2 CCR6 GZMK FOXP1 HDAC1 CD2 SGK1 MT2A S1PR4 CCR7 NR1D1 KLF2 MAP1S GIMAP4 IL2RA HAVCR2 ITM2C MYC LEF1 C1orf162 RORC PSPH CDKN2A FLI1 BATF TSC22D3 TNFRSF14 IFNGR2 CXCR5 LMNA GBP4 CTLA4 HPGD CDCA7L ABCA1 B2M IRF2 IL7R GPR25 ISG20 JUN CD28 CCR8 TIGIT SATB1 ANXA2 CCR4 CXCR3 TNFRSF4 PDCD1 SELL FHIT HLA-DRB1 S100A4 RPS26 CD3E HLA-DRA HLA-C HLA-E HLA-B APOBEC3G MALAT1 HLA-DR

tSNE for each marker gene

for(g in unique(x.melt$gene)) {
    .dt <- left_join(x.melt[gene == g], .cells)
    .aes <- aes(tSNE1, tSNE2, color=pmax(pmin(x, 3), -3))

    plt <-
        .gg.plot(.dt[order(`x`)], .aes) +
        xlab("TSNE1") + ylab("TSNE2") +
        ggrastr::rasterise(geom_point(stroke = 0, size=.7), dpi=300) +
        theme(legend.key.width = unit(.2,"lines")) +
        theme(legend.key.height = unit(.5,"lines")) +
        scale_color_distiller(g, palette = "RdBu", direction = -1) +
        ggtitle(g)

    .file <- fig.dir %&% "/Fig_mtreg_gene_tsne_" %&% g %&% ".pdf"
    .gg.save(filename = .file, plot = plt, width=3, height=2.5, cat.link = F)
    cat("[" %&% g %&% "](" %&% .file %&% ") ")
}

CD14 CD183 CD184 CD185 CD194 CD195 CD196 CD226 CD25 CD278 CD279 CD366 CD3 IL32 TRAF3IP3 CD6 CD74 FAS BRD9 IKZF2 FOXP3 SIRT2 TBX21 FOSL2 KEAP1 TCF7 LAG3 LYZ CD40LG CORO1A CTSH GSDMD GATA3 KLRB1 BACH2 CCR6 GZMK FOXP1 HDAC1 CD2 SGK1 MT2A S1PR4 CCR7 NR1D1 KLF2 MAP1S GIMAP4 IL2RA HAVCR2 ITM2C MYC LEF1 C1orf162 RORC PSPH CDKN2A FLI1 BATF TSC22D3 TNFRSF14 IFNGR2 CXCR5 LMNA GBP4 CTLA4 HPGD CDCA7L ABCA1 B2M IRF2 IL7R GPR25 ISG20 JUN CD28 CCR8 TIGIT SATB1 ANXA2 CCR4 CXCR3 TNFRSF4 PDCD1 SELL FHIT HLA-DRB1 S100A4 RPS26 CD3E HLA-DRA HLA-C HLA-E HLA-B APOBEC3G MALAT1 HLA-DR

E. Basic statistics

.stat <-
    .cells[,
           .(N = .N),
           by=.(batch, membership, component, disease)] %>%
    mutate(membership = as.factor(membership)) %>% 
    .sum.stat.batch()

plt <- .plt.sum.stat(.stat) + ggtitle("mtreg")
print(plt)

PDF

.stat.tot <-
    .cells[,
           .(N = .N),
           by=.(membership, disease)] %>%
    mutate(membership = as.factor(membership)) %>% 
    .sum.stat.tot() %>%
    mutate(batch = "(N=" %&% num.int(sum(.stat$N)) %&% ")")

plt <- .plt.sum.stat(.stat.tot) + ggtitle("mtreg")
print(plt)

PDF